Question 1

b

i

library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)

#rm(list = ls())
filepath = "data/tempexam.csv"
temp_data = read.csv2(filepath)
summary(temp_data)
##      Dates           y            
##  Min.   :1830   Length:184        
##  1st Qu.:1876   Class :character  
##  Median :1922   Mode  :character  
##  Mean   :1922                     
##  3rd Qu.:1967                     
##  Max.   :2013
# Nadayara-Watson function
gaussian_kernel = function(x,h){
  return ((1/sqrt(2*pi*(h^2))) * exp(-0.5*(x/h)^2))
}

nw_estimator = function(x, observed_x, observed_y, h){
 weights = sapply(observed_x, function(xi) gaussian_kernel(abs(x-xi), h))
 weights = weights/sum(weights) # normalize weights
 y_hat = sum(weights * observed_y)
 return (y_hat)
}

x_train = as.matrix(temp_data$Dates)
y_train = as.matrix(as.numeric(temp_data$y))

y_hat = sapply(x_train, nw_estimator,observed_x = x_train, observed_y = y_train, 0.5)
final_temp_data = data.frame(x= x_train, y = y_train, y_hat=y_hat)

# Plot Nadayara-Watson estimators against the original data
ggplot(final_temp_data, aes(x = x, y = y)) +
  geom_line(color = "blue") +
  geom_line(aes(y = y_hat), color = "red") +
  labs(title = "Nadaraya-Watson Regression Smoothing h=0.5",
       x = "Year",
       y = "Temperature")

ii

# Leave one-out-cross-validation

loo_cv = function(data,bandwidths){
  
 cv_grid = matrix(data=NA, nrow=nrow(data), ncol = length(bandwidths)+1)
 cv_error_grid = matrix(data=NA, nrow=nrow(data), ncol = length(bandwidths)+1)
 
 for (i in 1:nrow(data)){
   training_set = data[-i, ]
   validation_set = data[i, ]
   x_train = as.matrix(data$Dates)
   y_train = as.matrix(as.numeric(data$y))
   x_validation = as.matrix(validation_set$Dates)
   y_validation = as.matrix(as.numeric(validation_set$y))
   cv_grid[i,1] = x_validation
   cv_error_grid[i, 1] = x_validation
   
   for( j in 1:length(bandwidths)){
     h = bandwidths[j]
     y_hat = sapply(x_validation, nw_estimator,observed_x = x_train, observed_y = y_train, h)
     cv_grid[i,j+1] = y_hat
     cv_error_grid[i, j+1] = (y_validation - y_hat)^2
   }
 }
 colnames(cv_grid) = c("x", paste0("h", 1:length(bandwidths)+1))
 mse = apply(cv_error_grid[,-1], 2, mean)
 return (list(fitted_values = cv_grid, errors = cv_error_grid, mse=mse))
}

bandwidths = seq(1,30, length.out=101)

loo_cv_result = loo_cv(temp_data,bandwidths)
fitted_vals = loo_cv_result$fitted_values
cv_errors = loo_cv_result$errors
cv_mse = loo_cv_result$mse
plot(x= bandwidths, y=cv_mse, type = "l", main="Leave on out cross validation", xlab="h")

optimal_bandwith = bandwidths[which.min(cv_mse)]

# Use optimal bandwith to plot estimate
y_hat = sapply(x_train, nw_estimator,observed_x = x_train, observed_y = y_train, optimal_bandwith)
final_temp_data = data.frame(x= x_train, y = y_train, y_hat=y_hat)

# Plot Nadayara-Watson estimators against the original data
ggplot(final_temp_data, aes(x = x, y = y)) +
  geom_line(color = "blue") +
  geom_line(aes(y = y_hat), color = "red") +
  labs(title = "Nadaraya-Watson Regression Smoothing LOO-CV h=1",
       x = "Year",
       y = "Temperature")

Question 2

b(i)

library(tidyverse)

#rm(list = ls())

prostate_data = read.table("data/prostate_data.txt")
head(prostate_data)
##       lcavol  lweight age      lbph svi       lcp gleason pgg45       lpsa
## 1 -0.5798185 2.769459  50 -1.386294   0 -1.386294       6     0 -0.4307829
## 2 -0.9942523 3.319626  58 -1.386294   0 -1.386294       6     0 -0.1625189
## 3 -0.5108256 2.691243  74 -1.386294   0 -1.386294       7    20 -0.1625189
## 4 -1.2039728 3.282789  58 -1.386294   0 -1.386294       6     0 -0.1625189
## 5  0.7514161 3.432373  62 -1.386294   0 -1.386294       6     0  0.3715636
## 6 -1.0498221 3.228826  50 -1.386294   0 -1.386294       6     0  0.7654678
##   train
## 1  TRUE
## 2  TRUE
## 3  TRUE
## 4  TRUE
## 5  TRUE
## 6  TRUE
in_sample_obs = prostate_data%>%as_tibble() %>%
                filter(train==TRUE)
out_of_sample_obs = prostate_data%>%as_tibble() %>%
                filter(train==FALSE)

regopt = function(tuning, Y, X, nitermax=20){
  lambda1 = tuning[1]
  lambda2 = tuning[2]
  
  p = ncol(X)
  beta  = rep(0, p)
  errortol = 1e-4
  
  A = NULL
  if(lambda1 == 0 && lambda2 == 0){
    # compute OLS Beta
    beta = ols_beta(Y,X)
  
  }else {
     for (i in 1:nitermax) {
       # Store previous beta for convergence check
       beta_prev = beta
       # Update each coefficient in turn
       for (l in 1:p){
          beta[l] = beta_gradient_desc(tuning, Y, X, beta,l) 
       }
       # Check for convergence
       if( max(abs(beta - beta_prev)) < errortol){
         break
       }
     }
  }
  A = elastic_net_obj_function(tuning, Y,X, beta) 
  return(list(errortol= errortol, beta = beta, A = A))
  
}

ols_beta = function(Y,X){
  beta_hat = solve(t(X)%*%X)%*%t(X)%*%Y
  return (beta_hat)
}

compute_A <- function(Y, X, beta, lambda1, lambda2) {
  residual_sum_squares <- sum((Y - X %*% beta)^2)
  lasso_penalty <- lambda1 * sum(abs(beta))
  ridge_penalty <- lambda2 * sum(beta^2)
  A <- residual_sum_squares + lasso_penalty + ridge_penalty
  return(A)
}
  
elastic_net_obj_function = function(tuning, Y,X, beta){
  lambda1 = tuning[1]
  lambda2 = tuning[2]
  one_transponse = t(matrix(1, nrow=length(beta), ncol=1))
  rss = t((Y-X%*%beta))%*%(Y-X%*%beta)
  lasso_penalty = lambda1 * one_transponse %*% abs(beta)
  ridge_penalty = lambda2 * t(beta)%*%beta
  A = rss + lasso_penalty + ridge_penalty
  return(A)
}
  

beta_gradient_desc = function(tuning, Y,X, beta, l){
  lambda1     = tuning[1]
  lambda2     = tuning[2]
  r           = Y - X[, -l]%*%beta[-l] # exclude effect of beta_l
  numerator   = sum(X[, l] * r) - (lambda1 / 2) * sign(beta[l]) - lambda2 * beta[l]
  denominator = sum(X[, l]^2)
  beta_new    = numerator / denominator
  return (beta_new)
}



in_sample_obs_std = scale(in_sample_obs[,-10], center=TRUE, scale=TRUE)%>%as_tibble()
Y_train = as.matrix(in_sample_obs_std$lpsa)
colnames(Y_train) = "lpsa"
X_train = as.matrix(in_sample_obs_std[, -9])
# Execute regopt
tuning <- c(0.1, 0.1)
result <- regopt(tuning, Y_train, X_train, nitermax = 100)
print(result)
## $errortol
## [1] 1e-04
## 
## $beta
## [1]  0.58827529  0.24202016 -0.11607918  0.17445798  0.25443318 -0.23146209
## [7] -0.01320665  0.22296661
## 
## $A
##          lpsa
## lpsa 20.41962

b(ii)

loo_cv = function(Y,X, lambdas, lasso_regression=TRUE){
  
  n = nrow(Y)
  p = ncol(X)
  # Setup up cross-validation grid matrix
  cv_grid = matrix(data=NA, nrow=nrow(X), ncol = length(lambdas)) 
  # Setup cross validation error grid matrix
  cv_error_grid = matrix(data=NA, nrow=nrow(X), ncol = length(lambdas))

  for( i in 1:n){
    # Leave one observation out
    x_training_set = X[-i,]
    x_validation_set = X[i,]
    y_training_set  = Y[-i,]
    y_validation_set = Y[i,]
    for (j in 1: length(lambdas)){
      result = NA
      lambda = lambdas[j]
      if(lasso_regression){
        tuning = c(lambda, 0)
        result = regopt(tuning, y_training_set, x_training_set, nitermax = 20)
      }else{
        tuning = c(0, lambda)
        result = regopt(tuning, y_training_set, x_training_set, nitermax = 20)
      }
       y_hat = x_validation_set%*%result$beta
       cv_grid[i,j] = y_hat
       cv_error_grid[i, j] = (y_validation_set - y_hat)^2
    }
  }
  colnames(cv_grid) = c(paste0("lambda", c(lambdas)))
  colnames(cv_error_grid) = c(paste0("lambda", c(lambdas)))
  mse = apply(cv_error_grid, 2, mean)
  return (list(fitted_values = cv_grid, errors = cv_error_grid, mse=mse))
}

# Lambda range [0,5] across 100 points
#lambdas = 10^seq(0,log10(5), length.out=100)
lambdas = seq(0,5, length.out=100)

# Cross-validation for Lasso Regression
lasso_loo_cv_result = loo_cv(Y_train,X_train,lambdas,lasso_regression=TRUE)
lasso_reg_fitted_vals = lasso_loo_cv_result$fitted_values
lasso_reg_errors = lasso_loo_cv_result$errors
lasso_reg_mse = lasso_loo_cv_result$mse
optimal_lasso_lambda = lambdas[which.min(lasso_reg_mse)]
optimal_lasso_lambda 
## [1] 1.010101
# Lasso Coefficients
tuning = c(optimal_lasso_lambda,0 )
lasso_result <- regopt(tuning, Y_train, X_train, nitermax = 20)
lasso_coefficients = lasso_result$beta
lasso_coefficients
## [1]  0.56923536  0.23815764 -0.10273133  0.16689761  0.24120637 -0.19010916
## [7] -0.00630515  0.19572855
plot(x= lambdas, y=lasso_reg_mse, type = "l", main="Lasso Regression LOOCV", xlab="lambda", ylab="CV MSE")

# Cross-validation for Ridge Regression
ridge_loo_cv_result = loo_cv(Y_train,X_train,lambdas,lasso_regression=FALSE)
ridge_reg_fitted_vals = ridge_loo_cv_result$fitted_values
ridge_reg_errors = ridge_loo_cv_result$errors
ridge_reg_mse = ridge_loo_cv_result$mse
optimal_ridge_lambda = lambdas[which.min(ridge_reg_mse)]
optimal_ridge_lambda 
## [1] 3.434343
# Ridge Coefficients
tuning = c(0,optimal_ridge_lambda )
ridge_result <- regopt(tuning, Y_train, X_train, nitermax = 20)
ridge_coefficients = ridge_result$beta
ridge_coefficients
## [1]  0.527830561  0.239068732 -0.098759272  0.169576485  0.241391108
## [6] -0.159391458  0.004474402  0.184120639
plot(x= lambdas, y=ridge_reg_mse, type = "l", main="Ridge Regression LOOCV", xlab="lambda", ylab="CV MSE")

library(corrplot)
## corrplot 0.92 loaded
corr_matrix = cor(as.matrix(X_train))
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(corr_matrix, method = "color", col = col(200),
        type = "full", order = "original",
        addCoef.col = "white",
        tl.col = "black", tl.srt = 45,
       tl.pos = "lt",
       tl.cex = 0.6, cl.cex = 0.7,
        number.cex = 0.5
       )

b (iii)

library(boot)
set.seed(123)
bootstrap_lasso_fit = function(data, indices){
 data = data[indices,]
 Y_train = as.matrix(data$lpsa)
 colnames(Y_train) = "lpsa"
 X_train = as.matrix(data[, -9])
 tuning = c(optimal_lasso_lambda,0 )
 lasso_result <- regopt(tuning, Y_train, X_train, nitermax = 20)
 lasso_coefficients = lasso_result$beta
 return(lasso_coefficients)
}

boot_results = boot(data = in_sample_obs_std, statistic = bootstrap_lasso_fit, R = 1000)
bootstrap_estimates <- boot_results$t
boot_results
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = in_sample_obs_std, statistic = bootstrap_lasso_fit, 
##     R = 1000)
## 
## 
## Bootstrap Statistics :
##        original        bias    std. error
## t1*  0.56923536 -0.0129107952  0.11047961
## t2*  0.23815764 -0.0004787677  0.09306902
## t3* -0.10273133  0.0070653563  0.07072224
## t4*  0.16689761  0.0080934676  0.09351119
## t5*  0.24120637  0.0002915267  0.10626619
## t6* -0.19010916  0.0121471967  0.10396465
## t7* -0.00630515  0.0032717850  0.09706869
## t8*  0.19572855  0.0011523911  0.10114002
compute_ci <- function(bootstrap_estimates, level = 0.95) {
  ci_lower <- apply(bootstrap_estimates, 2, quantile, probs = (1 - level) / 2)
  ci_upper <- apply(bootstrap_estimates, 2, quantile, probs = 1 - (1 - level) / 2)
  return(data.frame(Lower = ci_lower, Upper = ci_upper))
}

# Compute 95% confidence intervals for each coefficient
confidence_intervals <- compute_ci(bootstrap_estimates)
print(confidence_intervals)
##          Lower        Upper
## 1  0.325635829 0.7635247034
## 2  0.057948344 0.4199194687
## 3 -0.216830483 0.0509321832
## 4  0.004149456 0.3749541289
## 5  0.007029293 0.4393423690
## 6 -0.387586053 0.0004771296
## 7 -0.207443099 0.1875128709
## 8  0.014446424 0.4131771040

Question 3

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(coda)
#rm(list = ls())

patient_profiles = read.csv2(file="data/profiles2024.csv", header=TRUE)
patient_profiles = as.matrix(patient_profiles[1:25,])

# Initial Settings
n = 25
T = 7
p = 3
iterations = 30000
burn_in = 10000


# Chain Starting values 
sigma_e2 = 1
sigma_phi2 = 2
phi = matrix(0, nrow=n, ncol=4)

# prior values
i1 = 2
i2 = 0.1
i3 = 2
i4 = 0.001

# Store initial chain values

sigma_e2_chain   = numeric(iterations)
sigma_phi2_chain = numeric(iterations)
phi_chain        = array(0, dim=c(n, 4, iterations))
# b(x) design matrix
b = function (x) { x* log(x) }
eta_k = c(0.5, 10, 25)
t_j   = c(0,3,7,14,21,28,42)
X_i = matrix(0, nrow=T, ncol=4)
X_i[,1] = 1

for ( j in 1:T){
  X_i[j, 2:4] = b(abs(t_j[j]-eta_k))
}

#------------------------------------- Perform Gibbs sampling-----------------------------------------------

for (k in 1:iterations){
  
  # Update sigma_e2
  patient_sum_of_squares = 0
  for ( i in 1:n){
    patient_sum_of_squares = patient_sum_of_squares + sum((patient_profiles[i, ]- X_i%*%phi[i,])^2)
  }
  sigma_e2 = 1/ rgamma(1, i1 + n*T/2, i2+ patient_sum_of_squares/2)
  
  # Update sigma_phi2
  sum_phi_squares = sum(phi^2)
  sigma_phi2 = 1 / rgamma(1, i3 + n * (p+1)/ 2, i4 + sum_phi_squares)
  
  # update phi_i
  for ( i in 1:n ){
    Sigma_phi <- solve((1/sigma_e2) * t(X_i) %*% X_i + diag(1/sigma_phi2, 4))
    mu_phi <- Sigma_phi %*% ((1/sigma_e2) * t(X_i) %*% patient_profiles[i, ])
    phi[i, ] <- mvrnorm(1, mu_phi, Sigma_phi)
  }
  # Store Markov Chain Draws
  sigma_e2_chain[k] <- sigma_e2
  sigma_phi2_chain[k] <- sigma_phi2
  phi_chain[, , k] <- phi
}

# ---------------------- Trace Plots and Convergence Statistics ----------------------------
# Remove burn-in samples
sigma_e2_chain <- sigma_e2_chain[(burn_in+1):iterations]
sigma_phi2_chain <- sigma_phi2_chain[(burn_in+1):iterations]
phi_chain <- phi_chain[, , (burn_in+1):iterations]

# Convergence diagnostics
par(mfrow = c(2, 2), mar = c(2, 2, 2, 1))
plot(as.mcmc(sigma_e2_chain), main = "Trace Plot for sigma_e2", xlab = "Iteration", ylab = "Value")

plot(as.mcmc(sigma_phi2_chain), main = "Trace Plot for sigma_phi2", xlab = "Iteration", ylab = "Value")

# Convergence diagnostics for phi parameters
par(mfrow = c(25, 4), mar = c(2, 2, 2, 1), oma = c(4, 4, 2, 1), cex.main = 0.7)
for (i in 1:25) {
  for (j in 1:4) {
    plot(as.mcmc(phi_chain[i, j, ]), 
         main = paste("phi[", i, ",", j, "]", sep = ""), 
         xlab = "Iteration", 
         ylab = "Value", 
         col.main = "blue", 
         cex.main = 0.7)
  }
}

par(mfrow = c(1, 1))  # Reset to default



# Extract posterior means and 95% credible intervals for patient 15
posterior_means <- apply(phi_chain[15, , ], 1, mean)
credible_intervals <- apply(phi_chain[15, , ], 1, quantile, probs = c(0.025, 0.975))

#---------------------------------- Analyses of 15th Observed Profile --------------------------------

# Plot the 15th observed profile
observed_profile <- patient_profiles[15, ]
predicted_profile <- X_i %*% posterior_means
predicted_profile_lower <- X_i %*% credible_intervals[1, ]
predicted_profile_upper <- X_i %*% credible_intervals[2, ]

plot(t_j, observed_profile, type = 'b', col = 'red', ylim = c(min(observed_profile, predicted_profile_lower), max(observed_profile, predicted_profile_upper)), ylab = 'z_ij', xlab = 't_j')
lines(t_j, predicted_profile, type = 'b', col = 'black')
lines(t_j, predicted_profile_lower, type = 'b', col = 'blue', lty = 2)
lines(t_j, predicted_profile_upper, type = 'b', col = 'blue', lty = 2)
legend("topleft", legend = c("Observed Profile", "Predicted Profile", "95% CI Lower", "95% CI Upper"), col = c("red", "black", "blue", "blue"), lty = c(1, 1, 2, 2), pch = c(1, 1, NA, NA), pt.cex = 1, cex = 0.8)